This R Markdown document reproduces the main analyses and figures for tumor evolution in high-clonality LUAD (Lung Adenocarcinoma) dataset from the Sherlock-Lung study
manuscript title: Deciphering lung adenocarcinoma evolution and the role of LINE-1 retrotransposition
Fig. 3: Characterization of tumors with mutational signature ID2.
(You may need to install some packages if missing)
library(tidyverse)
library(ggplot2)
library(ggsankey)
library(scales)
library(hrbrthemes) # For theme_ipsum_rc
library(cowplot)
library(forcats)
library(ggrepel)
library(ggnewscale)
library(data.table)
library(ggasym)
library(ggpmisc)
library(broom)
library(ggsci)
library(conflicted)
conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::rename)
pdfhr2()
# --- Define Helper Functions (if any custom) -----------------------------------
# Please source your custom functions here if needed, or place them in ./functions/
# source('./functions/your_custom_functions.R')
# --- Load Input Datasets ------------------------------------------------------
# All data files should be placed in the appropriate subfolders.
# Example folder structure:
# ./data/ - for main input files
# ./functions/ - for custom R functions
# ./output/ - for saving plots and results
# Replace the filenames below with your actual dataset filenames.
load('./data/BBsolution_final3_short.RData',verbose = T)
## Loading objects:
## BBsolution
## BBsolution4
## hq_samples
## wgs_groups_info
## wgs_groups_info2
load('./data/covdata0.RData',verbose = T)
## Loading objects:
## covdata0
load('./data/clinical_data.RData',verbose = T)
## Loading objects:
## clinical_data
load('./data/sherlock_data_all.RData',verbose = T)
## Loading objects:
## sherlock_data
## sherlock_data_full
## sherlock_overall
## sherlock_type_colors
## sherlock_freq
## sherlock_freq_hq
load('./data/sherlock_variable.RData',verbose = T)
## Loading objects:
## sherlock_variable
load('./data/sp_group_data.RData',verbose = T)
## Loading objects:
## sp_group_color
## sp_group_color_new
## sp_group_data
## sp_group_data2
load('./data/id2data.RData',verbose = T)
## Loading objects:
## id2data
## id2color
load('./data/tedata.RData',verbose = T)
## Loading objects:
## tedata
load('./data/suvdata.RData',verbose = T)
## Loading objects:
## suvdata
load('./data/RNASeq_Exp.RData',verbose = T)
## Loading objects:
## cdata3
## rdata1
## adata
load('./data/Signature_Lumidl_CN.RData',verbose = T)
## Loading objects:
## cn68_activity
## cn68_denovo_activity_nonsmoker
## cn68_denovo_activity_smoker
## cn68_signature
## cn68_decompsite
## cn68_signature_denovo_nonsmoker
## cn68_signature_denovo_smoker
## cn68_decompsite_denovo_nonsmoker
## cn68_decompsite_denovo_smoker
## cn68_denovo_mapping_nonsmoker
## cn68_denovo_mapping_detail_nonsmoker
load('./data/Signature_Lumidl_SV.RData',verbose = T)
## Loading objects:
## sv38_activity
## sv38_denovo_activity_nonsmoker
## sv38_denovo_activity_smoker
## sv38_signature
## sv38_decompsite
## sv38_signature_denovo_nonsmoker
## sv38_signature_denovo_smoker
## sv38_decompsite_denovo_nonsmoker
## sv38_decompsite_denovo_smoker
## sv38_denovo_mapping_nonsmoker
## sv38_denovo_mapping_detail_nonsmoker
load('./data/sherlock_profiles.RData',verbose = T)
## Loading objects:
## LCINS
## sherlock_dbs78_profile
## sherlock_sbs96_profile
## sherlock_id83_profile
## sherlock_sbs288_profile
## sherlock_cn68_profile
## sherlock_cn48_profile
## sherlock_sv38_profile
## sherlock_sv32_profile
# load function -----------------------------------------------------------
load('./data/ZTW_functions.RData',verbose = T)
## Loading objects:
## barcode2spg
## sp_group_color
## sp_group_color_new
## sp_group_lift
## sp_group_data
## sp_group_major_new
## sp_group_major
#source('./data/ZTW_functions.R')
source('./functions/Sherlock_functions.R')
# load analysis related data set
load('./data/Chronological_timing_short.RData',verbose = T)
## Loading objects:
## MRCAdata
## subclonesTimeAbs
## subclonesTimeAbsLinear
## mrate
## rateDeam
## timingclass_groups
## timingInfo
tmp <- colnames(MRCAdata)
MRCAdata <- MRCAdata %>% select(-SP_Group) %>% left_join(sp_group_data2) %>% select(-SP_Group_New) %>% select(one_of(tmp))
## analysis limited to luad only
hq_samples2 <- covdata0 %>% filter(Histology == 'Adenocarcinoma', Tumor_Barcode %in% hq_samples) %>% pull(Tumor_Barcode)
rm(list=c('hq_samples'))
Fig. 3a: Relationships between mutational signature ID2 presence and gene expression of tumor proliferation markers, analyzed from tumor and normal tissue RNA-Seq data. The horizontal line represents the significance threshold (FDR<0.05).
Fig. 3b: Pearson correlation between the number of deletions attributed to mutational signature ID2 and the gene expression of tumor proliferation markers. Pearson correlation coefficients and corresponding p-values are displayed above the plots.
# ID2 with all proliferation markers
genelist <- c('MKI67','TOP2A','MYBL2','BUB1','PLK1','CCNE1','CCNB1','BUB1','FOXM1')
tdata <- rdata1 %>% filter(Gene %in% genelist)
tdata <- id2data %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
left_join(tdata) %>%
filter(!is.na(Exp))
fcdata <- tdata %>%
group_by(RNAseq_Type,Gene,ID2_Present) %>%
summarise(mvalue=median(Exp)) %>%
ungroup() %>%
pivot_wider(names_from = ID2_Present,values_from = mvalue) %>%
mutate(FC=Present/Absent)
tdata %>%
group_by(RNAseq_Type,Gene) %>%
do(tidy(wilcox.test(Exp~ID2_Present, data=.))) %>%
left_join(fcdata) %>%
group_by(RNAseq_Type) %>%
mutate(FDR=p.adjust(p.value)) %>%
ggplot(aes(log2(FC),-log10(FDR),fill=RNAseq_Type))+
geom_hline(yintercept = -log10(0.05),linetype=2,col='red',size=0.5)+
# geom_vline(xintercept = c(-5,5),linetype=2,col='blue',size=0.5)+
geom_point(pch=21,stroke=0.4,size=3)+
scale_fill_nejm()+
scale_x_continuous(breaks = pretty_breaks(n=6))+
scale_y_continuous(breaks = pretty_breaks(n = 6))+
ggrepel::geom_text_repel(aes(label=Gene),size=3,max.overlaps = 20,force = 20)+
labs(x='log2(Fold change)',y='-log10(FDR)',fill='RNA-Seq data')+
guides(fill = guide_legend(override.aes = list(size=3.5)))+
theme_ipsum_rc(base_size = 12,axis_title_just = 'm',axis_title_size = 14,grid='XY',ticks = T)+
theme(legend.position = 'top')+
panel_border(color = 'black',size = 0.5)+
coord_cartesian(clip = 'off')
#ggsave(filename = './output/ID2_Proliferation_Markers.pdf',width = 5,height = 5,device = cairo_pdf())
tdata %>%
left_join(covdata0) %>%
group_by(Gene,RNAseq_Type) %>%
do(tidy(lm(Exp~ID2_Present + Assigned_Population + Gender + Smoking + Tumor_Purity + Age, data=.))) %>%
filter(term=='ID2_PresentPresent') %>%
arrange((p.value))
## # A tibble: 16 × 7
## # Groups: Gene, RNAseq_Type [16]
## Gene RNAseq_Type term estimate std.error statistic p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 FOXM1 Tumor ID2_PresentPresent 1.39 0.164 8.44 7.89e-16
## 2 PLK1 Tumor ID2_PresentPresent 1.22 0.147 8.29 2.34e-15
## 3 MYBL2 Tumor ID2_PresentPresent 1.72 0.212 8.13 6.90e-15
## 4 MKI67 Tumor ID2_PresentPresent 1.23 0.158 7.83 5.70e-14
## 5 TOP2A Tumor ID2_PresentPresent 1.37 0.184 7.44 7.70e-13
## 6 BUB1 Tumor ID2_PresentPresent 1.29 0.175 7.34 1.49e-12
## 7 CCNB1 Tumor ID2_PresentPresent 1.20 0.165 7.27 2.22e-12
## 8 CCNE1 Tumor ID2_PresentPresent 1.07 0.200 5.37 1.41e- 7
## 9 CCNB1 Normal ID2_PresentPresent 0.118 0.131 0.901 3.68e- 1
## 10 BUB1 Normal ID2_PresentPresent 0.123 0.144 0.853 3.95e- 1
## 11 MKI67 Normal ID2_PresentPresent 0.110 0.151 0.732 4.65e- 1
## 12 TOP2A Normal ID2_PresentPresent 0.124 0.170 0.726 4.69e- 1
## 13 FOXM1 Normal ID2_PresentPresent 0.0658 0.125 0.526 5.99e- 1
## 14 PLK1 Normal ID2_PresentPresent 0.0370 0.114 0.324 7.46e- 1
## 15 MYBL2 Normal ID2_PresentPresent 0.0552 0.180 0.307 7.59e- 1
## 16 CCNE1 Normal ID2_PresentPresent -0.0161 0.163 -0.0985 9.22e- 1
# association
tdata2 <- tdata %>%
filter(ID2>0, RNAseq_Type=='Tumor') %>%
select(Tumor_Barcode,ID2,Gene,Exp) %>%
pivot_wider(names_from = Gene,values_from = Exp) %>%
mutate(ID2=log2(ID2))
library(ggcorrplot)
corr <- round(cor(tdata2[,-1]), 1)
p.mat <- cor_pmat(tdata2[,-1])
ggcorrplot(corr)
ggcorrplot(corr,method = "circle",insig = "blank",
hc.order = TRUE,
type = "lower",
p.mat = p.mat,
lab = TRUE)
#ggsave(filename = './output/ID2_Proliferation_Markers2.pdf',width = 5,height = 5,device = cairo_pdf())
# box plot
tdata %>%
ggplot(aes(ID2_Present,Exp))+
geom_point(aes(fill=ID2_Present),pch=21,size=2.5,position = position_jitter(width = 0.2,height = 0))+
geom_boxplot(width=0.7,outlier.shape = NA,fill=NA,size=1)+
facet_grid(RNAseq_Type~Gene,scales ='free') +
scale_fill_manual(values = id2color)+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14,grid = 'Y')+
guides(fill='none')+#fill = guide_legend(override.aes=list(shape=21)),)+
labs(x='ID2 Mutational Signature',y='RNA-Seq gene expression log2(CPM)')+
theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1),panel.spacing = unit(0.1,'cm'))+
panel_border(color = 'black',linetype = 1)
#ggsave('./output/ID2_Proliferation_RNASeq.pdf',width = 10,height = 9,device = cairo_pdf)
tdata %>%
group_by(RNAseq_Type,Gene) %>%
do(tidy(wilcox.test(Exp~ID2_Present,data=.)))
## # A tibble: 16 × 6
## # Groups: RNAseq_Type, Gene [16]
## RNAseq_Type Gene statistic p.value method alternative
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 Normal BUB1 4557 7.38e- 1 Wilcoxon rank sum test with… two.sided
## 2 Normal CCNB1 4821 8.03e- 1 Wilcoxon rank sum test with… two.sided
## 3 Normal CCNE1 4833 7.82e- 1 Wilcoxon rank sum test with… two.sided
## 4 Normal FOXM1 4977 5.51e- 1 Wilcoxon rank sum test with… two.sided
## 5 Normal MKI67 4691 9.71e- 1 Wilcoxon rank sum test with… two.sided
## 6 Normal MYBL2 4769 8.93e- 1 Wilcoxon rank sum test with… two.sided
## 7 Normal PLK1 4868 7.23e- 1 Wilcoxon rank sum test with… two.sided
## 8 Normal TOP2A 4872 7.17e- 1 Wilcoxon rank sum test with… two.sided
## 9 Tumor BUB1 5343 4.27e-13 Wilcoxon rank sum test with… two.sided
## 10 Tumor CCNB1 5250 1.87e-13 Wilcoxon rank sum test with… two.sided
## 11 Tumor CCNE1 6497 4.37e- 9 Wilcoxon rank sum test with… two.sided
## 12 Tumor FOXM1 4669 8.20e-16 Wilcoxon rank sum test with… two.sided
## 13 Tumor MKI67 5262 2.08e-13 Wilcoxon rank sum test with… two.sided
## 14 Tumor MYBL2 4765 2.08e-15 Wilcoxon rank sum test with… two.sided
## 15 Tumor PLK1 4885 6.52e-15 Wilcoxon rank sum test with… two.sided
## 16 Tumor TOP2A 5254 1.94e-13 Wilcoxon rank sum test with… two.sided
# scatter plot
tmp <- tdata %>%
filter(ID2>0,RNAseq_Type=='Tumor') %>%
group_by(Gene) %>%
do(tidy(cor.test(log2(.$ID2),.$Exp))) %>%
ungroup() %>%
arrange(p.value) %>%
mutate(label=paste0(Gene,'\nR=',round(estimate,2),'; P=',scientific(p.value))) %>%
select(Gene,label) %>%
mutate(label=fct_inorder(label))
tdata %>%
filter(ID2>0,RNAseq_Type=='Tumor') %>%
left_join(tmp) %>%
ggplot(aes(log2(ID2),Exp))+
geom_point(pch=21,stroke=0.5,fill=id2color[2],size=2)+
facet_wrap(~label,nrow = 2)+
geom_smooth(method = 'lm')+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14,grid = 'Y',strip_text_face = 'bold')+
labs(x='Number of ID2 deletions (log2)',y='RNA-Seq expression log2(CPM)')+
theme(panel.spacing = unit(0.2,'lines'),strip.text.x = element_text(face = 'plain',hjust = 0.5))+
coord_cartesian(clip = 'off')+
panel_border(color = 'black',linetype = 1,size=0.5)+
guides(fill="none")
#ggsave('./output/ID2_Proliferation_RNASeq_tumor.pdf',width = 8,height = 5.5,device = cairo_pdf)
tmp <- tdata %>%
filter(ID2>0,RNAseq_Type=='Normal') %>%
group_by(Gene) %>%
do(tidy(cor.test(log2(.$ID2),.$Exp))) %>%
ungroup() %>%
arrange(p.value) %>%
mutate(label=paste0(Gene,'\nR=',round(estimate,2),'; P=',round(p.value,2))) %>%
select(Gene,label) %>%
mutate(label=fct_inorder(label))
tdata %>%
filter(ID2>0,RNAseq_Type=='Normal') %>%
left_join(tmp) %>%
ggplot(aes(log2(ID2),Exp))+
geom_point(pch=21,stroke=0.5,fill=id2color[2],size=2)+
facet_wrap(~label,nrow = 2)+
geom_smooth(method = 'lm')+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14,grid = 'Y',strip_text_face = 'bold')+
labs(x='Number of ID2 deletions (log2)',y='RNA-Seq expression log2(CPM)')+
theme(panel.spacing = unit(0.2,'lines'),strip.text.x = element_text(face = 'plain',hjust = 0.5))+
coord_cartesian(clip = 'off')+
panel_border(color = 'black',linetype = 1,size=0.5)+
guides(fill="none")
#ggsave('./output/ID2_Proliferation_RNASeq_normal.pdf',width = 8,height = 5.5,device = cairo_pdf)
Fig. 3c: Kaplan-Meier survival curves for overall survival, stratified by the presence of mutational signature ID2. Significance p-values and Hazard Ratios (HRs) were calculated using two-sided Cox proportional-hazards regression, adjusting for age, sex, smoking, and tumor stage. Numbers in brackets indicate the number of patients.
# ID2 survival
source('./functions/Survival_function.R')
suvdata <- suvdata %>% left_join(wgs_groups_info %>% select(Tumor_Barcode,SP_Group))
suvdata <- suvdata %>%
left_join(
sherlock_data_full %>% filter(Gene=='TP53',Type=='Mutation_Driver') %>% select(Tumor_Barcode,TP53_Status=Alteration) %>% mutate(TP53_Status = factor(TP53_Status, levels=c('No','Yes')))
)
suvdata_tmp <- id2data %>%
rename(Key = ID2_Present) %>%
left_join(suvdata) %>%
mutate(Key = factor(Key, levels = c('Absent','Present'))) %>%
filter(Tumor_Barcode %in% hq_samples2)
#mutate(Survival_Month = if_else(Survival_Month > 60, NA, Survival_Month))
sinfo1 <- SurvZTWms(suvdata_input = suvdata_tmp,plot = TRUE,keyname='Mutational Signature ID2: ',gcolors = as.character(id2color),pvalsize = 4,width = 5,height = 5,filename = 'ID2-survival.pdf')
## # A tibble: 1 × 3
## term p.value lab
## <chr> <dbl> <chr>
## 1 Present 0.00208 "Present:\nP = 0.0021\nHR = 1.8 (95% CI: 1.2-2.6)"
## theme(axis.text.x=element_text(hjust=c(0, rep(0.5, 4), 1))) +
## theme(axis.text.y=element_text(vjust=c(0, rep(0.5, 4), 1)))
print(sinfo1)
## # A tibble: 1 × 3
## term p.value lab
## <chr> <dbl> <chr>
## 1 Present 0.00208 "Present:\nP = 0.0021\nHR = 1.8 (95% CI: 1.2-2.6)"
sinfo2 <- SurvZTWms_tp53(suvdata_input = suvdata_tmp,plot = TRUE,keyname='Mutational Signature ID2: ',gcolors = as.character(id2color),pvalsize = 4,width = 5,height = 4,filename = 'ID2-survival-tp53.pdf')
## theme(axis.text.x=element_text(hjust=c(0, rep(0.5, 4), 1))) +
## theme(axis.text.y=element_text(vjust=c(0, rep(0.5, 4), 1)))
print(sinfo2)
## # A tibble: 1 × 3
## term p.value lab
## <chr> <dbl> <chr>
## 1 Present 0.0328 "Present:\nP = 0.033\nHR = 1.5 (95% CI: 1-2.2)"
## check the output pdf file for the survival plots
Odds ratios and p-values from the Fisher exact test are shown above the plot.
clinical_data %>% select(Tumor_Barcode,Metastasis_old)
## # A tibble: 1,217 × 2
## Tumor_Barcode Metastasis_old
## <chr> <chr>
## 1 IGC-02-1001-T03 Yes
## 2 IGC-02-1016-T01 Yes
## 3 IGC-02-1017-T01 Yes
## 4 IGC-02-1025-T01 Yes
## 5 IGC-02-1029-T01 No
## 6 IGC-02-1074-T01 No
## 7 IGC-02-1097-T01 Yes
## 8 IGC-02-1105-T01 Yes
## 9 IGC-02-1169-T01 No
## 10 IGC-02-1194-T01 Yes
## # ℹ 1,207 more rows
tdata <- id2data %>%
left_join(clinical_data %>% select(Tumor_Barcode,Metastasis=Metastasis_after_diagnosis) %>% mutate(Metastasis = factor(Metastasis, levels = c('No','Yes')))
) %>%
filter(!is.na(Metastasis)) %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
left_join(sp_group_data2) %>%
left_join(
sherlock_data_full %>% filter(Gene=='TP53',Type=='Mutation_Driver') %>% select(Tumor_Barcode,TP53_Status=Alteration) %>% mutate(TP53_Status = factor(TP53_Status, levels=c('No','Yes'),labels=c('TP53 wildtype','TP53 mutant')))
)
tdata %>% do(tidy(fisher.test(.$ID2_Present,.$Metastasis)))
## # A tibble: 1 × 6
## estimate p.value conf.low conf.high method alternative
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.45 0.00558 1.24 4.78 Fisher's Exact Test for Count… two.sided
tdata %>% group_by(TP53_Status) %>% do(tidy(fisher.test(.$ID2_Present,.$Metastasis)))
## # A tibble: 2 × 7
## # Groups: TP53_Status [2]
## TP53_Status estimate p.value conf.low conf.high method alternative
## <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 TP53 wildtype 5.25 0.000824 1.81 15.7 Fisher's Exact… two.sided
## 2 TP53 mutant 1.09 1 0.403 2.85 Fisher's Exact… two.sided
tdata %>% group_by(SP_Group_New) %>% do(tidy(fisher.test(.$ID2_Present,.$Metastasis)))
## # A tibble: 4 × 7
## # Groups: SP_Group_New [4]
## SP_Group_New estimate p.value conf.low conf.high method alternative
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 AS_N 0 1 0 190. Fisher's Exact T… two.sided
## 2 EU_N 2.76 0.0747 0.832 8.89 Fisher's Exact T… two.sided
## 3 EU_S 0.702 0.585 0.210 2.37 Fisher's Exact T… two.sided
## 4 Others 0.871 1 0.107 5.92 Fisher's Exact T… two.sided
tdata %>% filter(ID2>0) %>% do(tidy(wilcox.test(ID2~Metastasis,data=.)))
## # A tibble: 1 × 4
## statistic p.value method alternative
## <dbl> <dbl> <chr> <chr>
## 1 1815 0.0000274 Wilcoxon rank sum test with continuity correc… two.sided
my_comparisons = list(c('Yes','No'))
#myggstyle()
tdata %>%
filter(ID2>0) %>%
ggplot(aes(Metastasis,log2(ID2)))+
geom_point(aes(fill=Metastasis),pch=21,size=3,position = position_jitter(width = 0.2,height = 0),stroke=0.2)+
geom_boxplot(width=0.6,outlier.shape = NA,fill=NA,col=ncicolpal[1],size=0.6)+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1),panel.spacing = unit(0.2, "lines"),strip.background = element_rect(fill = 'gray95'),strip.text = element_text(hjust = 0.5),plot.margin = margin(4,4,4,4))+
labs(x = "Tumor Metastasis", y = 'ID2 deletions (log2)')+
scale_fill_manual(values = id2color)+
guides(fill="none")+
#facet_wrap(~TP53_Status,nrow = 1)+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(file='ID2_metastasis.pdf',width = 2,height = 6,device = cairo_pdf)
# overall
barplot_fisher(tdata,'ID2_Present','Metastasis',var1lab = 'Mutational signature ID2',var2lab = 'Tumor metastasis')
## [1] "Mutational signature ID2"
## # A tibble: 1 × 6
## estimate p.value conf.low conf.high method alternative
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.45 0.00558 1.24 4.78 Fisher's Exact Test for Count… two.sided
# TP53 wildtype
barplot_fisher(tdata %>% filter(TP53_Status=='TP53 wildtype'),'ID2_Present','Metastasis',var1lab = 'Mutational signature ID2',var2lab = 'Tumor metastasis')
## [1] "Mutational signature ID2"
## # A tibble: 1 × 6
## estimate p.value conf.low conf.high method alternative
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 5.25 0.000824 1.81 15.7 Fisher's Exact Test for Coun… two.sided
# TP53 mutant
barplot_fisher(tdata %>% filter(TP53_Status=='TP53 mutant'),'ID2_Present','Metastasis',var1lab = 'Mutational signature ID2',var2lab = 'Tumor metastasis')
## [1] "Mutational signature ID2"
## # A tibble: 1 × 6
## estimate p.value conf.low conf.high method alternative
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.09 1 0.403 2.85 Fisher's Exact Test for Count… two.sided
tdata %>%
left_join(covdata0) %>%
filter(Smoking=='Non-Smoker') %>%
do(tidy(glm(Metastasis ~ ID2_Present + TP53_Status + Tumor_Purity + Age + Gender, family='binomial',data=.)))
## # A tibble: 6 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.44 1.53 -0.947 0.344
## 2 ID2_PresentPresent 1.26 0.474 2.67 0.00767
## 3 TP53_StatusTP53 mutant 0.569 0.399 1.43 0.153
## 4 Tumor_Purity -0.220 1.42 -0.155 0.877
## 5 Age -0.0129 0.0188 -0.684 0.494
## 6 GenderFemale 0.381 0.481 0.794 0.427
Enrichment of genomic alterations (WGD=whole genome doubling; SCNA=somatic copy number alterations) in tumors with mutational signature ID2, determined through logistic regression and adjusted for ancestry, sex, smoking status, age, and tumor purity. The horizontal lines represent significance thresholds (FDR<0.05 in orange and FDR<0.01 in red)
drglist <- readRDS('./data/drivers_intogene.RDS') %>% pull(symbol)
drglist <- unique(c(drglist,c('RET','ALK','AXL','NRG1','MET','FGFR2','ROS1','RB1','ERBB4','EGFR')))
drglist <- c(drglist,paste0(drglist,'-Amp'),paste0(drglist,'-Del'))
features=c('WGD_Status','Distal_Proximal','A3A_or_A3B','Kataegis','HRDetect_Status','Forte','All_L1')
tdata <- sherlock_data_full %>%
mutate(Type=if_else(Gene=='All_L1','Overall_Feature',Type)) %>%
filter(Tumor_Barcode %in% hq_samples2,Gene %in% c(features,drglist),!(Type %in% c('Gene_Alterations','Multiple_Mutations','Pathogenic_Germline'))) %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
mutate(Alteration = if_else(Alteration == 'WGD','Yes',if_else(Alteration == 'nWGD','No',Alteration))) %>%
mutate(Alteration = if_else(Alteration == 'A3A','Yes',if_else(Alteration == 'A3B','No',Alteration))) %>%
left_join(id2data) %>%
mutate(Type=factor(Type,levels=c('Overall_Feature','Mutation_Driver','SCNA_Focal_Gene','Fusion')))
tresult <- tdata %>%
left_join(covdata0) %>%
group_by(Type,Gene) %>%
mutate(ID2_Present= as.integer(as.factor(ID2_Present))-1) %>%
do(tresult = safely(glm)(ID2_Present ~ Alteration + Assigned_Population + Gender + Age + Smoking + Tumor_Purity, family = "binomial", data=. )) %>%
mutate(tresult_null = map_lgl(tresult['result'], is.null)) %>%
filter(!tresult_null) %>%
mutate(fit = list(tidy(tresult[['result']]))) %>%
select(Type,Gene,fit) %>%
unnest(cols = c(fit)) %>%
ungroup() %>%
arrange(p.value) %>%
filter(term!='(Intercept)', term=='AlterationYes') %>%
filter(abs(estimate)<10) %>%
mutate(FDR=p.adjust(p.value))
tresult %>%
filter(!is.na(Type)) %>%
ggplot(aes((estimate),-log10(FDR)))+
geom_hline(yintercept = -log10(0.01),linetype=2,col='red',size=0.5)+
geom_hline(yintercept = -log10(0.05),linetype=2,col='orange',size=0.5)+
geom_point(aes(fill=Type),pch=21,stroke=0.1,col='black',size=4)+
scale_x_continuous(breaks = pretty_breaks())+
scale_y_continuous(breaks = pretty_breaks())+
ggrepel::geom_text_repel(data=tresult %>%filter(FDR<0.05), aes(label=Gene),force = 10,size=4)+
scale_fill_manual(values = ncicolpal)+
labs(x='Regression coefficient for tumor latency',y='-log10(FDR)',fill='Alterations or features')+
guides(fill = guide_legend(override.aes = list(size=5)))+
theme_ipsum_rc(base_size = 14,axis_title_just = 'm',axis_title_size = 16,grid='XY',ticks = T)+
panel_border(color = 'black',size = 0.5)
#ggsave(filename = './output/ID2_hq_tumor_genomic_association.pdf',width = 7.5,height = 5,device = cairo_pdf)
Distribution of estimated hypoxia scores between tumors with ID2 signatures and those without. P-values from the Wilcoxon rank-sum test are displayed above the plot.
# ID2 and hypoxia
library(ggdist)
tresult <- sherlock_variable %>%
left_join(id2data) %>%
group_by(name) %>%
do(tidy(wilcox.test(value~ID2_Present,data=.)))
tresult %>%
filter(str_detect(name,'hypoxia_score'))
## # A tibble: 7 × 5
## # Groups: name [7]
## name statistic p.value method alternative
## <chr> <dbl> <dbl> <chr> <chr>
## 1 hypoxia_score_Combined_GeneSet 28956. 7.54e-12 Wilcoxon r… two.sided
## 2 hypoxia_score_buffa 23364 6.67e-20 Wilcoxon r… two.sided
## 3 hypoxia_score_elvidge_hypoxia_all 40244 2.52e- 2 Wilcoxon r… two.sided
## 4 hypoxia_score_elvidge_hypoxia_short 38464. 2.96e- 3 Wilcoxon r… two.sided
## 5 hypoxia_score_ragnum 20218. 1.92e-25 Wilcoxon r… two.sided
## 6 hypoxia_score_sorenson 31078 2.13e- 9 Wilcoxon r… two.sided
## 7 hypoxia_score_west 25795 3.73e-16 Wilcoxon r… two.sided
tdata <- sherlock_variable %>%
filter(str_detect(name,'hypoxia_score')) %>%
filter(name=='hypoxia_score_Combined_GeneSet') %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
left_join(sp_group_data2) %>%
ungroup()
tdata <- tdata %>% mutate(SP_Group_New='ALL') %>% bind_rows(tdata) #%>% filter(SP_Group_New!='Others')
tdata %>%
ggplot(aes(SP_Group_New,value,fill=ID2_Present))+
stat_eye(side = "left",position = position_dodge(width = 0.2),col='black')+
scale_fill_manual(values = id2color)+
scale_y_continuous(breaks = pretty_breaks())+
theme_ipsum_rc(base_size = 18,axis_title_just = 'm',axis_title_size = 22)+
labs(x = "", y = 'Hypoxia scorer',fill='ID2 mutational signature')+
#guides(fill="none")+
theme(legend.position = 'top')+
panel_border(color = 'black',linetype = 1)
#ggsave(file='./output/ID2_hypoxia_score.pdf',width = 9,height = 9,device = cairo_pdf)
tdata %>% group_by(SP_Group_New) %>% do(tidy(wilcox.test(value~ID2_Present,data=.)))
## # A tibble: 5 × 5
## # Groups: SP_Group_New [5]
## SP_Group_New statistic p.value method alternative
## <chr> <dbl> <dbl> <chr> <chr>
## 1 ALL 7070. 0.000000214 Wilcoxon rank sum test with co… two.sided
## 2 AS_N 856. 0.0118 Wilcoxon rank sum test with co… two.sided
## 3 EU_N 536 0.00622 Wilcoxon rank sum test with co… two.sided
## 4 EU_S 408. 0.0259 Wilcoxon rank sum test with co… two.sided
## 5 Others 113 0.306 Wilcoxon rank sum test with co… two.sided
# ATR - CHECK1 vs ID2 -------------------------------------------------------------------------
tdata <- rdata1 %>% filter(Gene %in% c('CHEK1','CHEK2'))
tdata <- id2data %>%
left_join(tdata) %>%
filter(!is.na(Exp),!is.na(ID2)) %>%
left_join(wgs_groups_info)
tdata %>%
filter(RNAseq_Type=='Tumor') %>%
ggplot(aes(ID2_Present,Exp))+
geom_point(aes(fill=ID2_Present),pch=21,size=2.5,position = position_jitter(width = 0.15,height = 0))+
geom_boxplot(width=0.7,outlier.shape = NA,fill=NA)+
facet_grid(RNAseq_Type~Gene,scales = 'free') +
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14,grid = 'Y')+
guides(fill = guide_legend(override.aes=list(shape=21)))+
labs(x='ID2 mutational signature',y='RNA-Seq Expression log2(CPM)')+
theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1),panel.spacing = unit(0.25, "cm"))+
panel_border(color = 'black',linetype = 1)+
scale_fill_manual(values = c("#01665e","#ff7f00"))+
guides(fill="none")
#ggsave('./output/ID2_Checkpoint_RNASeq.pdf',width = 5,height = 8,device = cairo_pdf)
tdata %>%
group_by(RNAseq_Type,Gene) %>%
do(tidy(wilcox.test(Exp~ID2_Present,data=.))) %>%
arrange(p.value)
## # A tibble: 4 × 6
## # Groups: RNAseq_Type, Gene [4]
## RNAseq_Type Gene statistic p.value method alternative
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 Tumor CHEK1 20147 1.56e-25 Wilcoxon rank sum test with … two.sided
## 2 Tumor CHEK2 23793 3.40e-19 Wilcoxon rank sum test with … two.sided
## 3 Normal CHEK1 23218 1.51e- 1 Wilcoxon rank sum test with … two.sided
## 4 Normal CHEK2 22859 2.38e- 1 Wilcoxon rank sum test with … two.sided
tdata %>%
filter(ID2>0) %>%
filter(RNAseq_Type=='Tumor') %>%
ggplot(aes(log2(ID2),Exp))+
geom_point(aes(fill=RNAseq_Type),pch=21,stroke=0.2,size=2.5)+
geom_smooth(method = 'lm')+
facet_wrap(~Gene,scales = 'free',nrow = 2) +
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14,grid = 'Y',strip_text_face = 'bold')+
labs(y='RNA-Seq Expression log2(CPM)',x='ID2 mutations (log2)')+
theme(panel.spacing = unit(0.5,'lines'))+
scale_fill_manual(values = c("#ff7f00"))+
panel_border(color = 'black',linetype = 1)+
guides(fill="none")
#ggsave('./output/ID2_Checkpoint_RNASeq2.pdf',width = 4.5,height = 8,device = cairo_pdf)
tdata %>%
group_by(RNAseq_Type,Gene) %>%
filter(ID2>0) %>%
do(tidy(cor.test(.$Exp,log2(.$ID2),data=.))) %>%
arrange(p.value)
## # A tibble: 4 × 10
## # Groups: RNAseq_Type, Gene [4]
## RNAseq_Type Gene estimate statistic p.value parameter conf.low conf.high
## <chr> <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
## 1 Tumor CHEK1 0.510 12.5 1.07e-30 440 0.438 0.576
## 2 Tumor CHEK2 0.458 10.8 2.47e-24 440 0.381 0.529
## 3 Normal CHEK2 -0.164 -2.97 3.21e- 3 321 -0.268 -0.0554
## 4 Normal CHEK1 -0.140 -2.54 1.17e- 2 321 -0.246 -0.0316
## # ℹ 2 more variables: method <chr>, alternative <chr>
# tdata %>%
# mutate(Exp=2^Exp) %>%
# group_by(RNAseq_Type,ID2) %>%
# summarise(mvalue=median(Exp))
#
# Figure xxx: ID2 association with SV count and signature -----------------------------
my_comparisons <- list(c("Absent",'Present'))
tdata <- sherlock_variable %>%
filter(str_detect(name,'SV')) %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2)
tdata %>% group_by(name) %>% do(tidy(wilcox.test(log2(value+1) ~ ID2_Present, data=.)))
## # A tibble: 3 × 5
## # Groups: name [3]
## name statistic p.value method alternative
## <chr> <dbl> <dbl> <chr> <chr>
## 1 SV 15366. 3.57e- 9 Wilcoxon rank sum test with continu… two.sided
## 2 SV_Complex 23420. 6.54e- 1 Wilcoxon rank sum test with continu… two.sided
## 3 SV_Simple 9290. 1.26e-23 Wilcoxon rank sum test with continu… two.sided
tdata %>%
ggplot(aes(ID2_Present,log2(value+1),fill=ID2_Present))+
geom_boxplot(width=0.6,outlier.shape = NA,alpha =0.25)+
geom_point(pch=21,size=2.5,position = position_jitter(width = 0.15,height = 0),color="white",stroke=0.05)+
scale_fill_manual(values = id2color)+
facet_wrap(~name)+
scale_y_continuous(breaks = pretty_breaks(n = 7))+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(plot.margin = margin(4,4,4,4),panel.spacing = unit(0.1,'cm'),strip.text.x = element_text(hjust = 0.5,face = 4))+
labs(x = "Mutational signature ID2", y = 'log2(SV count + 1)')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(filename = 'SV_ID2_present_vs_absent.pdf',width = 5,height = 6,device = cairo_pdf)
# SV profiles
tdata <- sherlock_sv32_profile %>%
separate(col = MutationType,into = c('a','b','c'),sep = '_') %>% group_by(Tumor_Barcode,a,b) %>% summarise(Contribution = sum(Contribution)) %>% ungroup() %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2)
tdata %>% group_by(a,b) %>% do(tidy(wilcox.test(log2(Contribution+1) ~ ID2_Present, data=.))) %>% ungroup()%>% arrange(p.value) %>% mutate(fdr=p.adjust(p.value)) %>% View()
tdata %>%
ggplot(aes(ID2_Present,log2(Contribution+1),fill=ID2_Present))+
geom_boxplot(width=0.6,outlier.shape = NA,alpha =0.25)+
geom_point(pch=21,size=2,position = position_jitter(width = 0.15,height = 0),color="white",stroke=0.05)+
scale_fill_manual(values = id2color)+
facet_grid(a~b)+
scale_y_continuous(breaks = pretty_breaks(n = 7))+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(plot.margin = margin(4,4,4,4),panel.spacing = unit(0.1,'cm'),strip.text.x = element_text(hjust = 0.5,face = 4))+
labs(x = "Mutational signature ID2", y = 'log2(SV count + 1)')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(filename = './output/SV_subtypes_ID2_present_vs_absent.pdf',width = 6,height = 8,device = cairo_pdf)
my_comparisons <- list(c("Absent",'Present'))
tdata <- sherlock_variable %>%
filter(str_detect(name,'SV')) %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
left_join(covdata0 %>% select(Tumor_Barcode, Smoking)) %>%
filter(Smoking!='Unknown') %>%
filter(name=='SV')
tdata %>% group_by(Smoking,name) %>% do(tidy(wilcox.test(log2(value+1) ~ ID2_Present, data=.)))
## # A tibble: 2 × 6
## # Groups: Smoking, name [2]
## Smoking name statistic p.value method alternative
## <fct> <chr> <dbl> <dbl> <chr> <chr>
## 1 Smoker SV 1132 0.000000329 Wilcoxon rank sum test wit… two.sided
## 2 Non-Smoker SV 8369 0.0393 Wilcoxon rank sum test wit… two.sided
tdata %>%
ggplot(aes(ID2_Present,log2(value+1),fill=ID2_Present))+
geom_boxplot(width=0.6,outlier.shape = NA,alpha =0.25)+
geom_point(pch=21,size=2.5,position = position_jitter(width = 0.15,height = 0),color="white",stroke=0.05)+
scale_fill_manual(values = id2color)+
facet_wrap(~Smoking)+
scale_y_continuous(breaks = pretty_breaks(n = 7))+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(plot.margin = margin(4,4,4,4),panel.spacing = unit(0.1,'cm'),strip.text.x = element_text(hjust = 0.5,face = 4))+
labs(x = "Mutational signature ID2", y = 'log2(SV count + 1)')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(filename = './output/SV_ID2_present_vs_absent_Smoking.pdf',width = 4,height = 6,device = cairo_pdf)
# # Figure xxx: ID2 association with PGA and CNV signature ----------------
my_comparisons <- list(c("Absent",'Present'))
load('./data/sherlock_PGA.RData',verbose = T)
## Loading objects:
## sherlock_PGA
tdata <- sherlock_PGA %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2)
tdata %>% do(tidy(wilcox.test(PGA_WGD ~ ID2_Present, data=.)))
## # A tibble: 1 × 4
## statistic p.value method alternative
## <dbl> <dbl> <chr> <chr>
## 1 15293 0.00000000265 Wilcoxon rank sum test with continuity co… two.sided
tdata %>%
ggplot(aes(ID2_Present,PGA_WGD,fill=ID2_Present))+
geom_boxplot(width=0.6,outlier.shape = NA,alpha =0.25)+
geom_point(pch=21,size=2.5,position = position_jitter(width = 0.15,height = 0),color="white",stroke=0.05)+
scale_fill_manual(values = id2color)+
scale_y_continuous(breaks = pretty_breaks(n = 7),labels = percent_format())+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(plot.margin = margin(4,4,4,4),panel.spacing = unit(0.1,'cm'),strip.text.x = element_text(hjust = 0.5,face = 4))+
labs(x = "Mutational signature ID2", y = 'Percent genome altered by copy number')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(filename = './output/PGA_ID2_present_vs_absent.pdf',width = 2.7,height = 6,device = cairo_pdf)
my_comparisons <- list(c("Absent",'Present'))
tdata <- sherlock_PGA %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2) %>%
left_join(covdata0 %>% select(Tumor_Barcode, Smoking)) %>%
filter(Smoking!='Unknown')
tdata %>% do(tidy(wilcox.test(PGA_WGD ~ ID2_Present, data=.)))
## # A tibble: 1 × 4
## statistic p.value method alternative
## <dbl> <dbl> <chr> <chr>
## 1 15287 0.00000000302 Wilcoxon rank sum test with continuity co… two.sided
tdata %>%
ggplot(aes(ID2_Present,PGA_WGD,fill=ID2_Present))+
geom_boxplot(width=0.6,outlier.shape = NA,alpha =0.25)+
geom_point(pch=21,size=2.5,position = position_jitter(width = 0.15,height = 0),color="white",stroke=0.05)+
facet_wrap(~Smoking)+
scale_fill_manual(values = id2color)+
scale_y_continuous(breaks = pretty_breaks(n = 7),labels = percent_format())+
theme_ipsum_rc(axis_title_just = 'm',axis_title_size = 14)+
theme(plot.margin = margin(4,4,4,4),panel.spacing = unit(0.1,'cm'),strip.text.x = element_text(hjust = 0.5,face = 4))+
labs(x = "Mutational signature ID2", y = 'Percent genome altered by copy number')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1)+
stat_compare_means(comparisons = my_comparisons)
#ggsave(filename = './output/PGA_ID2_present_vs_absent_smoking.pdf',width = 4,height = 6,device = cairo_pdf)
# Signature
# Figure 2c-d ---------------------------------------------------------------
nmutation_input <- 0
conflicts_prefer(dplyr::lag)
source('./functions/Sigvisualfunc.R')
load('./data/Sigvisualfunc.RData')
sigcol <- ncicolpal[1:length(colnames(cn68_activity)[-1])]
names(sigcol) <- colnames(cn68_activity)[-1]
nmutation_input <- 0
tmp <- id2data %>% filter(Tumor_Barcode %in% hq_samples2,ID2_Present == "Present")%>% pull(Tumor_Barcode)
sigdata <- cn68_activity %>% rename(Samples=Tumor_Barcode) %>% filter(Samples %in% tmp)
data1 <- prevalence_plot(sigdata = sigdata,nmutation = nmutation_input,output_plot = paste0('./output/ID2_present_prevalence.pdf'),colset = sigcol,rel_widths = c(1,4))
tmp <- id2data %>% filter(Tumor_Barcode %in% hq_samples2,ID2_Present == "Absent")%>% pull(Tumor_Barcode)
sigdata <- cn68_activity %>% rename(Samples=Tumor_Barcode) %>% filter(Samples %in% tmp)
data2 <- prevalence_plot(sigdata = sigdata,nmutation = nmutation_input,output_plot = paste0('./output/ID2_absent_prevalence.pdf'),colset = sigcol,rel_widths = c(1,4))
tdata <- bind_rows(
data1$freq_data %>% mutate(Group='Present'),
data2$freq_data %>% mutate(Group='Absent')
)
#myggstyle()
plotdata <- tdata %>%
filter(Type == 'Prevalence by samples') %>%
arrange(desc(Value)) %>%
mutate(Signature = fct_inorder(Signature)) %>%
mutate(Value2=if_else(Value<0.1,0.1,Value)) %>%
mutate(Value = if_else(Group=='Absent',-1*Value,Value)) %>%
mutate(Value2 = if_else(Group=='Absent',-1*Value2,Value2))
p1 <- plotdata %>%
ggplot(aes(Value, y=Signature,fill=Signature))+
geom_col(width = 0.75,col='gray10',linewidth=0.3)+
geom_vline(xintercept = 0,col='gray10')+
geom_text(data = plotdata %>% filter(Group=='Present'),aes(label=Percentage),vjust=0.5,hjust=2)+
geom_text(data = plotdata %>% filter(Group=='Absent'),aes(label=Percentage),vjust=0.5,hjust=-1)+
scale_fill_manual(values = sigcol)+
scale_x_continuous(limits = c(-1,1),breaks = c(-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1),labels = c("100%","75%","50%","25%","0%","25%","50%","75%","100%"))+
labs(x='Prevalence by samples (%)',y=NULL,fill='Group')+
theme_ipsum_rc(base_size = 12,axis_title_just = 'm',axis_title_size = 12,grid = 'Y',ticks = T,plot_margin=margin(5.5,5.5,5.5,5.5))+
panel_border(color = 'gray10',size = 1)+
theme(legend.position = 'none',panel.grid.major.y = element_line(linetype = 2,colour = 'gray90'))
p1
testdata <- cn68_activity %>%
pivot_longer(-Tumor_Barcode) %>%
mutate(value=if_else(value>0,'Present','Absent')) %>%
mutate(value=factor(value,levels=c('Absent','Present'))) %>%
left_join(id2data) %>%
filter(Tumor_Barcode %in% hq_samples2,ID2_Present %in% c('Absent','Present')) %>%
mutate(ID2_Present = factor(ID2_Present, levels=c('Absent','Present'))) %>%
group_by(name) %>%
mutate(value=as.factor(value)) %>%
do(tresult = safely(stats::fisher.test)(.$value,.$ID2_Present)) %>%
mutate(tresult_null = map_lgl(tresult['result'], is.null)) %>%
filter(!tresult_null) %>%
mutate(fit = list(tidy(tresult[['result']]))) %>%
select(name,fit) %>%
unnest(cols = c(fit)) %>%
ungroup() %>%
arrange(p.value) %>%
mutate(FDR=p.adjust(p.value,method='BH'))
p2 <- testdata %>%
ggplot(aes(log2(estimate),-log10(FDR),fill=name))+
geom_hline(yintercept = -log10(0.05),col=ncicolpal[2],linetype=2)+
geom_hline(yintercept = -log10(0.01),col=ncicolpal[1],linetype=2)+
geom_vline(xintercept = 0,col="gray10",linetype=1,linewidth=0.5)+
geom_point(pch=21,size=3.5,col='black',stroke=0.2)+
ggrepel::geom_text_repel(data=testdata %>% filter(FDR<0.05),aes(label=name))+
scale_fill_manual(values = sigcol)+
scale_x_continuous(breaks = pretty_breaks(n = 7),limits = c(-4.5,4.5),position = "top")+
scale_y_continuous(breaks = pretty_breaks(n=7))+
theme_ipsum_rc(base_size = 12,axis_title_just = 'm',axis_title_size = 12,grid = F,ticks = T,plot_margin=margin(5.5,5.5,5.5,5.5))+
labs(x = 'Odd ratio (log2)', y = '-log10(FDR)\n')+
guides(fill="none")+
panel_border(color = 'black',linetype = 1,size=0.5)+
coord_cartesian(clip = 'off')
p2
plot_grid(p2,p1,align = 'v',axis = 'lr',ncol = 1,rel_heights = c(2,2))
#ggsave(filename = './output/CNV_signature_ID2_present_vs_absent.pdf',width = 6,height = 5.5,device = cairo_pdf)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] janitor_2.2.1 ggforce_0.5.0 ggtext_0.1.2 ggdist_3.3.3
## [5] survival_3.8-3 survminer_0.5.0 ggpubr_0.6.1 ggcorrplot_0.1.4.1
## [9] showtext_0.9-7 showtextdb_3.0 sysfonts_0.8.9 conflicted_1.2.0
## [13] ggsci_3.2.0 broom_1.0.8 ggpmisc_0.6.2 ggpp_0.5.9
## [17] ggasym_0.1.6 data.table_1.17.8 ggnewscale_0.5.2 ggrepel_0.9.6
## [21] cowplot_1.2.0 hrbrthemes_0.8.7 scales_1.4.0 ggsankey_0.0.99999
## [25] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [29] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1 tibble_3.3.0
## [33] ggplot2_3.5.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] polynom_1.4-1 gridExtra_2.3 rlang_1.1.6
## [4] magrittr_2.0.3 snakecase_0.11.1 compiler_4.3.2
## [7] mgcv_1.9-1 systemfonts_1.2.3 vctrs_0.6.5
## [10] reshape2_1.4.4 quantreg_6.1 pkgconfig_2.0.3
## [13] fastmap_1.2.0 backports_1.5.0 labeling_0.4.3
## [16] KMsurv_0.1-6 utf8_1.2.6 rmarkdown_2.29
## [19] tzdb_0.5.0 ragg_1.4.0 MatrixModels_0.5-4
## [22] xfun_0.52 cachem_1.1.0 jsonlite_2.0.0
## [25] tweenr_2.0.3 R6_2.6.1 bslib_0.9.0
## [28] stringi_1.8.7 RColorBrewer_1.1-3 car_3.1-3
## [31] extrafontdb_1.0 jquerylib_0.1.4 Rcpp_1.1.0
## [34] knitr_1.50 zoo_1.8-14 extrafont_0.19
## [37] Matrix_1.6-5 splines_4.3.2 timechange_0.3.0
## [40] tidyselect_1.2.1 rstudioapi_0.17.1 dichromat_2.0-0.1
## [43] abind_1.4-8 yaml_2.3.10 lattice_0.22-7
## [46] plyr_1.8.9 withr_3.0.2 evaluate_1.0.4
## [49] polyclip_1.10-7 xml2_1.3.8 survMisc_0.5.6
## [52] pillar_1.11.0 carData_3.0-5 distributional_0.5.0
## [55] generics_0.1.4 hms_1.1.3 xtable_1.8-4
## [58] glue_1.8.0 gdtools_0.4.2 tools_4.3.2
## [61] SparseM_1.84-2 ggsignif_0.6.4 grid_4.3.2
## [64] Rttf2pt1_1.3.12 nlme_3.1-168 Formula_1.2-5
## [67] cli_3.6.5 km.ci_0.5-6 textshaping_1.0.1
## [70] fontBitstreamVera_0.1.1 gtable_0.3.6 rstatix_0.7.2
## [73] sass_0.4.10 digest_0.6.37 fontquiver_0.2.1
## [76] farver_2.1.2 memoise_2.0.1 htmltools_0.5.8.1
## [79] lifecycle_1.0.4 fontLiberation_0.1.0 gridtext_0.1.5
## [82] MASS_7.3-60.0.1